********************************************************************************
* CREATE FINAL DATA SET
* File description: Merge different registers and create variables needed for analysis
********************************************************************************


use "use\LISA\Lisa_2011", clear

foreach i of numlist 2012/2017 {
append using "use/LISA/Lisa_`i'" 

}

* Drop duplicates
duplicates drop LopNr year, force

sort LopNr year

* Manage income and municipality variables
by LopNr: gen dispinc_lag=dispinc[_n-1]
by LopNr: gen dispinc_2lag=dispinc[_n-2]
by LopNr: gen dispinch_lag=dispinch[_n-1]
by LopNr: gen dispinch_2lag=dispinch[_n-2]
by LopNr: gen dispinc_permanent=(dispinc[_n-1]+dispinc[_n-2]+dispinc[_n-3])/3

bys muni year : egen percentile_2lag=xtile(dispinc_2lag), nq(10)

bys muni year: egen p90_muni=pctile(dispinc),p(90) 
bys muni year: egen p80_muni=pctile(dispinc),p(80)
bys muni year: egen p50_muni=pctile(dispinc),p(50)
bys muni year: egen p20_muni=pctile(dispinc),p(20)
bys muni year: egen p25_muni=pctile(dispinc),p(25)
bys muni year: egen p75_muni=pctile(dispinc),p(75)
bys muni year: egen p95_muni=pctile(dispinc),p(95)

bys muni year: egen p90_muni_2lag=pctile(dispinc_2lag),p(90) 
bys muni year: egen p80_muni_2lag=pctile(dispinc_2lag),p(80)
bys muni year: egen p50_muni_2lag=pctile(dispinc_2lag),p(50)
bys muni year: egen p95_muni_2lag=pctile(dispinc_2lag),p(95)

bys muni year: egen share_married=mean(married)

g low_educ=cond(educ<=11,1,0)

bys muni year: egen share_loweduc=mean(low_educ)
drop low_educ
bys muni year: egen share_female=mean(female)

gen poor=cond(dispinch<(0.6*p50_muni),1,0)

bys muni year: egen share_poor=mean(poor)

bys muni year: egen avg_kids=mean(kids)
bys muni year: egen avg_age=mean(age)

bys muni year: egen share_unemployed=mean(unemployed)
bys muni year: egen share_selfemployed=mean(selfemployed)

gen x=1
bys muni year: egen population=sum(x)
drop x

merge m:1 municipality year using "raw/houseprices"

by LopNr: gen houseprice_lag=houseprices[_n-1]
by LopNr: gen houseprice_lag2=houseprices[_n-2]

drop if year<2014

merge 1:1 LopNr year using "raw/SEA/claims.dta", keep(1 3) nogen

replace claim_dummy=0 if mi(claim_dummy)
replace claim_unpaid=0 if mi(claim_unpaid)
replace claim_tot=0 if mi(claim_tot)

merge 1:1 LopNr year using "use/yitz/yitz_muni", keep(1 3) nogen 

merge 1:1 LopNr year using "use/yitz/yitz_outliers", keep(1 3) nogen 

merge 1:1 LopNr year using "use/yitz/yitz_deso", keep(1 3) nogen 

merge 1:1 LopNr year using "use/yitz/yitz_cfarnr", keep(1 3) nogen 

merge 1:1 LopNr year using "use/yitz/yitz_muni_perm", keep(1 3) nogen
 
merge 1:1 LopNr year using "use/yitz/yitz_agegroup", keep(1 3) nogen 

gen lndispinc=log(dispinc_2lag)
gen lndispinc2=log(dispinc2)
gen lndispinch=log(dispinch_2lag)

gen lnhouseprice=log(houseprices)

gen age2=age^2

gen lnp20=log(p20_muni)
gen lnp90=log(p90_muni)
gen lnp80=log(p80_muni)
gen lnp50=log(p50_muni)
gen lnp50_cfarnr=log(p50_cfarnr)
gen lnp50_deso=log(p50_deso)
gen lnp50_agegroup=log(p50_agegroup)

* Interactions
gen cfarnr_year=CfarNr*year
gen muni_year=municipality*year
gen agegroup_year=agegroup*year
gen deso_year=deso*year

* Final restrictions and variable changes

drop if dispinc<0
g dispinc2=dispinc_2lag^2 

replace yitz_cfarnr=yitz_cfarnr/100000
replace yitz_muni=yitz_muni/100000
replace yitz_deso=yitz_deso/100000
replace yitz_agegroup=yitz_agegroup/100000
replace yitz_muni_outliers=yitz_muni_outliers/100000
replace yitz_muni_perm=yitz_muni_perm/100000

* Label
label var age "Age"
label var age2 "Age squared"
label var dispinc "Individual disposable income"
label var dispinch "Household disposable income"
label var dispinc_euro "Disposable income (EUR 2016)"
label var female "Female"
label var early_retirement "Early retirement"
label var sick_leave "Sick leave"
label var soc_ben "Social welfare"
label var kids "Kids"
label var married "Married"
label var unemployed "Unemployed"
label var educ "Education attainment"
label var yitz_muni "YRD, municipality"
label var yitz_cfarnr "YRD, workplace"
label var yitz_deso "YRD, deso"
label var yitz_agegroup "YRD, agegroup"
label var yitz_muni_perm "YRD, permanent income"
label var yitz_muni_outliers "YRD, municipality no outliers"
label var avg_age "Average age"
label var share_poor "Share poor"
label var lnhouseprice "Average houseprices"

save "use/final_data", replace
